import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.patches as patches
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,r2_score,confusion_matrix,f1_score,precision_score,recall_score
import category_encoders as ce
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, LabelEncoder
import plotly.express as px
df_customer = pd.read_csv("Customer_Behavior_Data.csv",delimiter=';')
df = df_customer
df_customer.head()
| account length | location code | user id | credit card info save | push status | add to wishlist | desktop sessions | app sessions | desktop transactions | total product detail views | session duration | promotion clicks | avg order value | sale product views | discount rate per visited products | product detail view per app session | app transactions | add to cart per session | customer service calls | churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 128 | 415 | 3824657 | no | yes | 25 | 265 | 45 | 17 | 110 | 197 | 87 | 244,7 | 91 | 11,01 | 10 | 3 | 2,7 | 1 | 0 |
| 1 | 107 | 415 | 3717191 | no | yes | 26 | 162 | 27 | 17 | 123 | 196 | 103 | 254,4 | 103 | 11,45 | 13,7 | 3 | 3,7 | 1 | 0 |
| 2 | 137 | 415 | 3581921 | no | no | 0 | 243 | 41 | 10 | 114 | 121 | 110 | 162,6 | 104 | 7,32 | 12,2 | 5 | 3,29 | 0 | 0 |
| 3 | 84 | 408 | 3759999 | yes | no | 0 | 299 | 51 | 5 | 71 | 62 | 88 | 196,9 | 89 | 8,86 | 6,6 | 7 | 1,78 | 2 | 0 |
| 4 | 75 | 415 | 3306626 | yes | no | 0 | 167 | 28 | 13 | 113 | 148 | 122 | 186,9 | 121 | 8,41 | 10,1 | 3 | 2,73 | 3 | 0 |
df_customer.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3333 entries, 0 to 3332 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 account length 3333 non-null int64 1 location code 3333 non-null int64 2 user id 3333 non-null int64 3 credit card info save 3333 non-null object 4 push status 3333 non-null object 5 add to wishlist 3333 non-null int64 6 desktop sessions 3333 non-null int64 7 app sessions 3333 non-null int64 8 desktop transactions 3333 non-null int64 9 total product detail views 3333 non-null int64 10 session duration 3333 non-null int64 11 promotion clicks 3333 non-null int64 12 avg order value 3333 non-null object 13 sale product views 3333 non-null int64 14 discount rate per visited products 3333 non-null object 15 product detail view per app session 3333 non-null object 16 app transactions 3333 non-null int64 17 add to cart per session 3333 non-null object 18 customer service calls 3333 non-null int64 19 churn 3333 non-null int64 dtypes: int64(14), object(6) memory usage: 520.9+ KB
df_customer["avg order value"] = df_customer["avg order value"].apply(lambda x: x.replace(",", "."))
df_customer.head()
| account length | location code | user id | credit card info save | push status | add to wishlist | desktop sessions | app sessions | desktop transactions | total product detail views | session duration | promotion clicks | avg order value | sale product views | discount rate per visited products | product detail view per app session | app transactions | add to cart per session | customer service calls | churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 128 | 415 | 3824657 | no | yes | 25 | 265 | 45 | 17 | 110 | 197 | 87 | 244.7 | 91 | 11,01 | 10 | 3 | 2,7 | 1 | 0 |
| 1 | 107 | 415 | 3717191 | no | yes | 26 | 162 | 27 | 17 | 123 | 196 | 103 | 254.4 | 103 | 11,45 | 13,7 | 3 | 3,7 | 1 | 0 |
| 2 | 137 | 415 | 3581921 | no | no | 0 | 243 | 41 | 10 | 114 | 121 | 110 | 162.6 | 104 | 7,32 | 12,2 | 5 | 3,29 | 0 | 0 |
| 3 | 84 | 408 | 3759999 | yes | no | 0 | 299 | 51 | 5 | 71 | 62 | 88 | 196.9 | 89 | 8,86 | 6,6 | 7 | 1,78 | 2 | 0 |
| 4 | 75 | 415 | 3306626 | yes | no | 0 | 167 | 28 | 13 | 113 | 148 | 122 | 186.9 | 121 | 8,41 | 10,1 | 3 | 2,73 | 3 | 0 |
df_customer['discount rate per visited products']= df_customer['discount rate per visited products'].apply(lambda x: x.replace(",", "."))
df_customer['product detail view per app session']= df_customer['product detail view per app session'].apply(lambda x: x.replace(",", "."))
df_customer['add to cart per session']= df_customer['add to cart per session'].apply(lambda x: x.replace(",", "."))
df_customer.head()
| account length | location code | user id | credit card info save | push status | add to wishlist | desktop sessions | app sessions | desktop transactions | total product detail views | session duration | promotion clicks | avg order value | sale product views | discount rate per visited products | product detail view per app session | app transactions | add to cart per session | customer service calls | churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 128 | 415 | 3824657 | no | yes | 25 | 265 | 45 | 17 | 110 | 197 | 87 | 244.7 | 91 | 11.01 | 10 | 3 | 2.7 | 1 | 0 |
| 1 | 107 | 415 | 3717191 | no | yes | 26 | 162 | 27 | 17 | 123 | 196 | 103 | 254.4 | 103 | 11.45 | 13.7 | 3 | 3.7 | 1 | 0 |
| 2 | 137 | 415 | 3581921 | no | no | 0 | 243 | 41 | 10 | 114 | 121 | 110 | 162.6 | 104 | 7.32 | 12.2 | 5 | 3.29 | 0 | 0 |
| 3 | 84 | 408 | 3759999 | yes | no | 0 | 299 | 51 | 5 | 71 | 62 | 88 | 196.9 | 89 | 8.86 | 6.6 | 7 | 1.78 | 2 | 0 |
| 4 | 75 | 415 | 3306626 | yes | no | 0 | 167 | 28 | 13 | 113 | 148 | 122 | 186.9 | 121 | 8.41 | 10.1 | 3 | 2.73 | 3 | 0 |
df_customer.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3333 entries, 0 to 3332 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 account length 3333 non-null int64 1 location code 3333 non-null int64 2 user id 3333 non-null int64 3 credit card info save 3333 non-null object 4 push status 3333 non-null object 5 add to wishlist 3333 non-null int64 6 desktop sessions 3333 non-null int64 7 app sessions 3333 non-null int64 8 desktop transactions 3333 non-null int64 9 total product detail views 3333 non-null int64 10 session duration 3333 non-null int64 11 promotion clicks 3333 non-null int64 12 avg order value 3333 non-null object 13 sale product views 3333 non-null int64 14 discount rate per visited products 3333 non-null object 15 product detail view per app session 3333 non-null object 16 app transactions 3333 non-null int64 17 add to cart per session 3333 non-null object 18 customer service calls 3333 non-null int64 19 churn 3333 non-null int64 dtypes: int64(14), object(6) memory usage: 520.9+ KB
df_customer['avg order value'] = df_customer['avg order value'].astype(float)
df_customer['discount rate per visited products']= df_customer['discount rate per visited products'].astype(float)
df_customer['product detail view per app session']= df_customer['product detail view per app session'].astype(float)
df_customer['add to cart per session']= df_customer['add to cart per session'].astype(float)
df_customer.describe()
| account length | location code | user id | add to wishlist | desktop sessions | app sessions | desktop transactions | total product detail views | session duration | promotion clicks | avg order value | sale product views | discount rate per visited products | product detail view per app session | app transactions | add to cart per session | customer service calls | churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 3333.000000 | 3333.000000 | 3.333000e+03 | 3333.000000 | 3333.000000 | 3333.000000 | 3333.000000 | 3333.000000 | 3333.000000 | 3333.000000 | 3333.000000 | 3333.000000 | 3333.000000 | 3333.000000 | 3333.000000 | 3333.000000 | 3333.000000 | 3333.000000 |
| mean | 101.064806 | 437.182418 | 3.746291e+06 | 8.099010 | 179.811881 | 30.567957 | 17.087609 | 100.435644 | 201.039604 | 100.110711 | 200.872037 | 100.107711 | 9.039325 | 10.237294 | 4.479448 | 2.764581 | 1.562856 | 0.144914 |
| std | 39.822106 | 42.371290 | 2.746626e+05 | 13.688365 | 54.457135 | 9.269376 | 4.323795 | 20.069084 | 50.714359 | 19.923911 | 50.573847 | 19.568609 | 2.275873 | 2.791840 | 2.461214 | 0.753773 | 1.315491 | 0.352067 |
| min | 1.000000 | 408.000000 | 3.271058e+06 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 23.200000 | 33.000000 | 1.040000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 74.000000 | 408.000000 | 3.508680e+06 | 0.000000 | 144.000000 | 24.000000 | 14.000000 | 87.000000 | 167.000000 | 87.000000 | 167.000000 | 87.000000 | 7.520000 | 8.500000 | 3.000000 | 2.300000 | 1.000000 | 0.000000 |
| 50% | 101.000000 | 415.000000 | 3.748187e+06 | 0.000000 | 179.000000 | 31.000000 | 17.000000 | 101.000000 | 201.000000 | 100.000000 | 201.200000 | 100.000000 | 9.050000 | 10.300000 | 4.000000 | 2.780000 | 1.000000 | 0.000000 |
| 75% | 127.000000 | 510.000000 | 3.985970e+06 | 20.000000 | 216.000000 | 37.000000 | 20.000000 | 114.000000 | 235.000000 | 114.000000 | 235.300000 | 113.000000 | 10.590000 | 12.100000 | 6.000000 | 3.270000 | 2.000000 | 0.000000 |
| max | 243.000000 | 510.000000 | 4.229964e+06 | 51.000000 | 351.000000 | 60.000000 | 31.000000 | 165.000000 | 364.000000 | 170.000000 | 395.000000 | 175.000000 | 17.770000 | 20.000000 | 20.000000 | 5.400000 | 9.000000 | 1.000000 |
df_customer.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3333 entries, 0 to 3332 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 account length 3333 non-null int64 1 location code 3333 non-null int64 2 user id 3333 non-null int64 3 credit card info save 3333 non-null object 4 push status 3333 non-null object 5 add to wishlist 3333 non-null int64 6 desktop sessions 3333 non-null int64 7 app sessions 3333 non-null int64 8 desktop transactions 3333 non-null int64 9 total product detail views 3333 non-null int64 10 session duration 3333 non-null int64 11 promotion clicks 3333 non-null int64 12 avg order value 3333 non-null float64 13 sale product views 3333 non-null int64 14 discount rate per visited products 3333 non-null float64 15 product detail view per app session 3333 non-null float64 16 app transactions 3333 non-null int64 17 add to cart per session 3333 non-null float64 18 customer service calls 3333 non-null int64 19 churn 3333 non-null int64 dtypes: float64(4), int64(14), object(2) memory usage: 520.9+ KB
df_customer.isnull().sum()
account length 0 location code 0 user id 0 credit card info save 0 push status 0 add to wishlist 0 desktop sessions 0 app sessions 0 desktop transactions 0 total product detail views 0 session duration 0 promotion clicks 0 avg order value 0 sale product views 0 discount rate per visited products 0 product detail view per app session 0 app transactions 0 add to cart per session 0 customer service calls 0 churn 0 dtype: int64
def sniff_modified(df):
with pd.option_context("display.max_colwidth", 20):
info = pd.DataFrame()
info['data type'] = df.dtypes
info['percent missing'] = df.isnull().sum()*100/len(df)
info['No. unique'] = df.apply(lambda x: len(x.unique()))
info['unique values'] = df.apply(lambda x: x.unique())
return info.sort_values('data type')
sniff_modified(df_customer)
| data type | percent missing | No. unique | unique values | |
|---|---|---|---|---|
| account length | int64 | 0.0 | 212 | [128, 107, 137, 84, 75, 118, 121, 147, 117, 14... |
| app transactions | int64 | 0.0 | 21 | [3, 5, 7, 6, 4, 2, 9, 19, 1, 10, 15, 8, 11, 0,... |
| sale product views | int64 | 0.0 | 120 | [91, 103, 104, 89, 121, 118, 96, 90, 97, 111, ... |
| promotion clicks | int64 | 0.0 | 123 | [87, 103, 110, 88, 122, 101, 108, 94, 80, 111,... |
| session duration | int64 | 0.0 | 287 | [197, 196, 121, 62, 148, 221, 349, 103, 352, 2... |
| customer service calls | int64 | 0.0 | 10 | [1, 0, 2, 3, 4, 5, 7, 9, 6, 8] |
| desktop transactions | int64 | 0.0 | 30 | [17, 10, 5, 13, 19, 30, 9, 14, 21, 26, 27, 24,... |
| total product detail views | int64 | 0.0 | 119 | [110, 123, 114, 71, 113, 98, 88, 79, 97, 84, 1... |
| desktop sessions | int64 | 0.0 | 295 | [265, 162, 243, 299, 167, 223, 218, 157, 185, ... |
| add to wishlist | int64 | 0.0 | 46 | [25, 26, 0, 24, 37, 27, 33, 39, 30, 41, 28, 34... |
| user id | int64 | 0.0 | 3333 | [3824657, 3717191, 3581921, 3759999, 3306626, ... |
| location code | int64 | 0.0 | 3 | [415, 408, 510] |
| app sessions | int64 | 0.0 | 60 | [45, 27, 41, 51, 28, 38, 37, 31, 44, 22, 32, 2... |
| churn | int64 | 0.0 | 2 | [0, 1] |
| avg order value | float64 | 0.0 | 1591 | [244.7, 254.4, 162.6, 196.9, 186.9, 203.9, 212... |
| discount rate per visited products | float64 | 0.0 | 933 | [11.01, 11.45, 7.32, 8.86, 8.41, 9.18, 9.57, 9... |
| product detail view per app session | float64 | 0.0 | 162 | [10.0, 13.7, 12.2, 6.6, 10.1, 6.3, 7.5, 7.1, 8... |
| add to cart per session | float64 | 0.0 | 162 | [2.7, 3.7, 3.29, 1.78, 2.73, 1.7, 2.03, 1.92, ... |
| push status | object | 0.0 | 2 | [yes, no] |
| credit card info save | object | 0.0 | 2 | [no, yes] |
df_customer.shape
(3333, 20)
df_customer.to_csv('cleaned_customer.csv')
The dataset contains as many unique user IDs as there are total rows, indicating that there are no duplicate values in the dataset.
df_customer.columns
Index(['account length', 'location code', 'user id', 'credit card info save',
'push status', 'add to wishlist', 'desktop sessions', 'app sessions',
'desktop transactions', 'total product detail views',
'session duration', 'promotion clicks', 'avg order value',
'sale product views', 'discount rate per visited products',
'product detail view per app session', 'app transactions',
'add to cart per session', 'customer service calls', 'churn'],
dtype='object')
duplicates = {}
for column in df_customer.columns:
duplicates[column]=df_customer[df_customer.duplicated(column,keep = False)]
duplicates
{'account length': account length location code user id credit card info save \
0 128 415 3824657 no
1 107 415 3717191 no
2 137 415 3581921 no
3 84 408 3759999 yes
4 75 415 3306626 yes
... ... ... ... ...
3328 192 415 4144276 no
3329 68 415 3703271 no
3330 28 510 3288230 no
3331 184 510 3646381 yes
3332 74 415 4004344 no
push status add to wishlist desktop sessions app sessions \
0 yes 25 265 45
1 yes 26 162 27
2 no 0 243 41
3 no 0 299 51
4 no 0 167 28
... ... ... ... ...
3328 yes 36 156 27
3329 no 0 231 39
3330 no 0 181 31
3331 no 0 214 36
3332 yes 25 234 40
desktop transactions total product detail views session duration \
0 17 110 197
1 17 123 196
2 10 114 121
3 5 71 62
4 13 113 148
... ... ... ...
3328 18 77 216
3329 13 57 153
3330 25 109 289
3331 14 105 160
3332 23 113 266
promotion clicks avg order value sale product views \
0 87 244.7 91
1 103 254.4 103
2 110 162.6 104
3 88 196.9 89
4 122 186.9 121
... ... ... ...
3328 126 279.1 83
3329 55 191.3 123
3330 58 191.9 91
3331 84 139.2 137
3332 82 241.4 77
discount rate per visited products product detail view per app session \
0 11.01 10.0
1 11.45 13.7
2 7.32 12.2
3 8.86 6.6
4 8.41 10.1
... ... ...
3328 12.56 9.9
3329 8.61 9.6
3330 8.64 14.1
3331 6.26 5.0
3332 10.86 13.7
app transactions add to cart per session customer service calls churn
0 3 2.70 1 0
1 3 3.70 1 0
2 5 3.29 0 0
3 7 1.78 2 0
4 3 2.73 3 0
... ... ... ... ...
3328 6 2.67 2 0
3329 4 2.59 3 0
3330 6 3.81 2 0
3331 10 1.35 2 0
3332 4 3.70 0 0
[3317 rows x 20 columns],
'location code': account length location code user id credit card info save \
0 128 415 3824657 no
1 107 415 3717191 no
2 137 415 3581921 no
3 84 408 3759999 yes
4 75 415 3306626 yes
... ... ... ... ...
3328 192 415 4144276 no
3329 68 415 3703271 no
3330 28 510 3288230 no
3331 184 510 3646381 yes
3332 74 415 4004344 no
push status add to wishlist desktop sessions app sessions \
0 yes 25 265 45
1 yes 26 162 27
2 no 0 243 41
3 no 0 299 51
4 no 0 167 28
... ... ... ... ...
3328 yes 36 156 27
3329 no 0 231 39
3330 no 0 181 31
3331 no 0 214 36
3332 yes 25 234 40
desktop transactions total product detail views session duration \
0 17 110 197
1 17 123 196
2 10 114 121
3 5 71 62
4 13 113 148
... ... ... ...
3328 18 77 216
3329 13 57 153
3330 25 109 289
3331 14 105 160
3332 23 113 266
promotion clicks avg order value sale product views \
0 87 244.7 91
1 103 254.4 103
2 110 162.6 104
3 88 196.9 89
4 122 186.9 121
... ... ... ...
3328 126 279.1 83
3329 55 191.3 123
3330 58 191.9 91
3331 84 139.2 137
3332 82 241.4 77
discount rate per visited products product detail view per app session \
0 11.01 10.0
1 11.45 13.7
2 7.32 12.2
3 8.86 6.6
4 8.41 10.1
... ... ...
3328 12.56 9.9
3329 8.61 9.6
3330 8.64 14.1
3331 6.26 5.0
3332 10.86 13.7
app transactions add to cart per session customer service calls churn
0 3 2.70 1 0
1 3 3.70 1 0
2 5 3.29 0 0
3 7 1.78 2 0
4 3 2.73 3 0
... ... ... ... ...
3328 6 2.67 2 0
3329 4 2.59 3 0
3330 6 3.81 2 0
3331 10 1.35 2 0
3332 4 3.70 0 0
[3333 rows x 20 columns],
'user id': Empty DataFrame
Columns: [account length, location code, user id, credit card info save, push status, add to wishlist, desktop sessions, app sessions, desktop transactions, total product detail views, session duration, promotion clicks, avg order value, sale product views, discount rate per visited products, product detail view per app session, app transactions, add to cart per session, customer service calls, churn]
Index: [],
'credit card info save': account length location code user id credit card info save \
0 128 415 3824657 no
1 107 415 3717191 no
2 137 415 3581921 no
3 84 408 3759999 yes
4 75 415 3306626 yes
... ... ... ... ...
3328 192 415 4144276 no
3329 68 415 3703271 no
3330 28 510 3288230 no
3331 184 510 3646381 yes
3332 74 415 4004344 no
push status add to wishlist desktop sessions app sessions \
0 yes 25 265 45
1 yes 26 162 27
2 no 0 243 41
3 no 0 299 51
4 no 0 167 28
... ... ... ... ...
3328 yes 36 156 27
3329 no 0 231 39
3330 no 0 181 31
3331 no 0 214 36
3332 yes 25 234 40
desktop transactions total product detail views session duration \
0 17 110 197
1 17 123 196
2 10 114 121
3 5 71 62
4 13 113 148
... ... ... ...
3328 18 77 216
3329 13 57 153
3330 25 109 289
3331 14 105 160
3332 23 113 266
promotion clicks avg order value sale product views \
0 87 244.7 91
1 103 254.4 103
2 110 162.6 104
3 88 196.9 89
4 122 186.9 121
... ... ... ...
3328 126 279.1 83
3329 55 191.3 123
3330 58 191.9 91
3331 84 139.2 137
3332 82 241.4 77
discount rate per visited products product detail view per app session \
0 11.01 10.0
1 11.45 13.7
2 7.32 12.2
3 8.86 6.6
4 8.41 10.1
... ... ...
3328 12.56 9.9
3329 8.61 9.6
3330 8.64 14.1
3331 6.26 5.0
3332 10.86 13.7
app transactions add to cart per session customer service calls churn
0 3 2.70 1 0
1 3 3.70 1 0
2 5 3.29 0 0
3 7 1.78 2 0
4 3 2.73 3 0
... ... ... ... ...
3328 6 2.67 2 0
3329 4 2.59 3 0
3330 6 3.81 2 0
3331 10 1.35 2 0
3332 4 3.70 0 0
[3333 rows x 20 columns],
'push status': account length location code user id credit card info save \
0 128 415 3824657 no
1 107 415 3717191 no
2 137 415 3581921 no
3 84 408 3759999 yes
4 75 415 3306626 yes
... ... ... ... ...
3328 192 415 4144276 no
3329 68 415 3703271 no
3330 28 510 3288230 no
3331 184 510 3646381 yes
3332 74 415 4004344 no
push status add to wishlist desktop sessions app sessions \
0 yes 25 265 45
1 yes 26 162 27
2 no 0 243 41
3 no 0 299 51
4 no 0 167 28
... ... ... ... ...
3328 yes 36 156 27
3329 no 0 231 39
3330 no 0 181 31
3331 no 0 214 36
3332 yes 25 234 40
desktop transactions total product detail views session duration \
0 17 110 197
1 17 123 196
2 10 114 121
3 5 71 62
4 13 113 148
... ... ... ...
3328 18 77 216
3329 13 57 153
3330 25 109 289
3331 14 105 160
3332 23 113 266
promotion clicks avg order value sale product views \
0 87 244.7 91
1 103 254.4 103
2 110 162.6 104
3 88 196.9 89
4 122 186.9 121
... ... ... ...
3328 126 279.1 83
3329 55 191.3 123
3330 58 191.9 91
3331 84 139.2 137
3332 82 241.4 77
discount rate per visited products product detail view per app session \
0 11.01 10.0
1 11.45 13.7
2 7.32 12.2
3 8.86 6.6
4 8.41 10.1
... ... ...
3328 12.56 9.9
3329 8.61 9.6
3330 8.64 14.1
3331 6.26 5.0
3332 10.86 13.7
app transactions add to cart per session customer service calls churn
0 3 2.70 1 0
1 3 3.70 1 0
2 5 3.29 0 0
3 7 1.78 2 0
4 3 2.73 3 0
... ... ... ... ...
3328 6 2.67 2 0
3329 4 2.59 3 0
3330 6 3.81 2 0
3331 10 1.35 2 0
3332 4 3.70 0 0
[3333 rows x 20 columns],
'add to wishlist': account length location code user id credit card info save \
0 128 415 3824657 no
1 107 415 3717191 no
2 137 415 3581921 no
3 84 408 3759999 yes
4 75 415 3306626 yes
... ... ... ... ...
3328 192 415 4144276 no
3329 68 415 3703271 no
3330 28 510 3288230 no
3331 184 510 3646381 yes
3332 74 415 4004344 no
push status add to wishlist desktop sessions app sessions \
0 yes 25 265 45
1 yes 26 162 27
2 no 0 243 41
3 no 0 299 51
4 no 0 167 28
... ... ... ... ...
3328 yes 36 156 27
3329 no 0 231 39
3330 no 0 181 31
3331 no 0 214 36
3332 yes 25 234 40
desktop transactions total product detail views session duration \
0 17 110 197
1 17 123 196
2 10 114 121
3 5 71 62
4 13 113 148
... ... ... ...
3328 18 77 216
3329 13 57 153
3330 25 109 289
3331 14 105 160
3332 23 113 266
promotion clicks avg order value sale product views \
0 87 244.7 91
1 103 254.4 103
2 110 162.6 104
3 88 196.9 89
4 122 186.9 121
... ... ... ...
3328 126 279.1 83
3329 55 191.3 123
3330 58 191.9 91
3331 84 139.2 137
3332 82 241.4 77
discount rate per visited products product detail view per app session \
0 11.01 10.0
1 11.45 13.7
2 7.32 12.2
3 8.86 6.6
4 8.41 10.1
... ... ...
3328 12.56 9.9
3329 8.61 9.6
3330 8.64 14.1
3331 6.26 5.0
3332 10.86 13.7
app transactions add to cart per session customer service calls churn
0 3 2.70 1 0
1 3 3.70 1 0
2 5 3.29 0 0
3 7 1.78 2 0
4 3 2.73 3 0
... ... ... ... ...
3328 6 2.67 2 0
3329 4 2.59 3 0
3330 6 3.81 2 0
3331 10 1.35 2 0
3332 4 3.70 0 0
[3329 rows x 20 columns],
'desktop sessions': account length location code user id credit card info save \
0 128 415 3824657 no
1 107 415 3717191 no
2 137 415 3581921 no
4 75 415 3306626 yes
5 118 510 3918027 yes
... ... ... ... ...
3328 192 415 4144276 no
3329 68 415 3703271 no
3330 28 510 3288230 no
3331 184 510 3646381 yes
3332 74 415 4004344 no
push status add to wishlist desktop sessions app sessions \
0 yes 25 265 45
1 yes 26 162 27
2 no 0 243 41
4 no 0 167 28
5 no 0 223 38
... ... ... ... ...
3328 yes 36 156 27
3329 no 0 231 39
3330 no 0 181 31
3331 no 0 214 36
3332 yes 25 234 40
desktop transactions total product detail views session duration \
0 17 110 197
1 17 123 196
2 10 114 121
4 13 113 148
5 19 98 221
... ... ... ...
3328 18 77 216
3329 13 57 153
3330 25 109 289
3331 14 105 160
3332 23 113 266
promotion clicks avg order value sale product views \
0 87 244.7 91
1 103 254.4 103
2 110 162.6 104
4 122 186.9 121
5 101 203.9 118
... ... ... ...
3328 126 279.1 83
3329 55 191.3 123
3330 58 191.9 91
3331 84 139.2 137
3332 82 241.4 77
discount rate per visited products product detail view per app session \
0 11.01 10.0
1 11.45 13.7
2 7.32 12.2
4 8.41 10.1
5 9.18 6.3
... ... ...
3328 12.56 9.9
3329 8.61 9.6
3330 8.64 14.1
3331 6.26 5.0
3332 10.86 13.7
app transactions add to cart per session customer service calls churn
0 3 2.70 1 0
1 3 3.70 1 0
2 5 3.29 0 0
4 3 2.73 3 0
5 6 1.70 0 0
... ... ... ... ...
3328 6 2.67 2 0
3329 4 2.59 3 0
3330 6 3.81 2 0
3331 10 1.35 2 0
3332 4 3.70 0 0
[3292 rows x 20 columns],
'app sessions': account length location code user id credit card info save \
0 128 415 3824657 no
1 107 415 3717191 no
2 137 415 3581921 no
3 84 408 3759999 yes
4 75 415 3306626 yes
... ... ... ... ...
3328 192 415 4144276 no
3329 68 415 3703271 no
3330 28 510 3288230 no
3331 184 510 3646381 yes
3332 74 415 4004344 no
push status add to wishlist desktop sessions app sessions \
0 yes 25 265 45
1 yes 26 162 27
2 no 0 243 41
3 no 0 299 51
4 no 0 167 28
... ... ... ... ...
3328 yes 36 156 27
3329 no 0 231 39
3330 no 0 181 31
3331 no 0 214 36
3332 yes 25 234 40
desktop transactions total product detail views session duration \
0 17 110 197
1 17 123 196
2 10 114 121
3 5 71 62
4 13 113 148
... ... ... ...
3328 18 77 216
3329 13 57 153
3330 25 109 289
3331 14 105 160
3332 23 113 266
promotion clicks avg order value sale product views \
0 87 244.7 91
1 103 254.4 103
2 110 162.6 104
3 88 196.9 89
4 122 186.9 121
... ... ... ...
3328 126 279.1 83
3329 55 191.3 123
3330 58 191.9 91
3331 84 139.2 137
3332 82 241.4 77
discount rate per visited products product detail view per app session \
0 11.01 10.0
1 11.45 13.7
2 7.32 12.2
3 8.86 6.6
4 8.41 10.1
... ... ...
3328 12.56 9.9
3329 8.61 9.6
3330 8.64 14.1
3331 6.26 5.0
3332 10.86 13.7
app transactions add to cart per session customer service calls churn
0 3 2.70 1 0
1 3 3.70 1 0
2 5 3.29 0 0
3 7 1.78 2 0
4 3 2.73 3 0
... ... ... ... ...
3328 6 2.67 2 0
3329 4 2.59 3 0
3330 6 3.81 2 0
3331 10 1.35 2 0
3332 4 3.70 0 0
[3330 rows x 20 columns],
'desktop transactions': account length location code user id credit card info save \
0 128 415 3824657 no
1 107 415 3717191 no
2 137 415 3581921 no
3 84 408 3759999 yes
4 75 415 3306626 yes
... ... ... ... ...
3328 192 415 4144276 no
3329 68 415 3703271 no
3330 28 510 3288230 no
3331 184 510 3646381 yes
3332 74 415 4004344 no
push status add to wishlist desktop sessions app sessions \
0 yes 25 265 45
1 yes 26 162 27
2 no 0 243 41
3 no 0 299 51
4 no 0 167 28
... ... ... ... ...
3328 yes 36 156 27
3329 no 0 231 39
3330 no 0 181 31
3331 no 0 214 36
3332 yes 25 234 40
desktop transactions total product detail views session duration \
0 17 110 197
1 17 123 196
2 10 114 121
3 5 71 62
4 13 113 148
... ... ... ...
3328 18 77 216
3329 13 57 153
3330 25 109 289
3331 14 105 160
3332 23 113 266
promotion clicks avg order value sale product views \
0 87 244.7 91
1 103 254.4 103
2 110 162.6 104
3 88 196.9 89
4 122 186.9 121
... ... ... ...
3328 126 279.1 83
3329 55 191.3 123
3330 58 191.9 91
3331 84 139.2 137
3332 82 241.4 77
discount rate per visited products product detail view per app session \
0 11.01 10.0
1 11.45 13.7
2 7.32 12.2
3 8.86 6.6
4 8.41 10.1
... ... ...
3328 12.56 9.9
3329 8.61 9.6
3330 8.64 14.1
3331 6.26 5.0
3332 10.86 13.7
app transactions add to cart per session customer service calls churn
0 3 2.70 1 0
1 3 3.70 1 0
2 5 3.29 0 0
3 7 1.78 2 0
4 3 2.73 3 0
... ... ... ... ...
3328 6 2.67 2 0
3329 4 2.59 3 0
3330 6 3.81 2 0
3331 10 1.35 2 0
3332 4 3.70 0 0
[3331 rows x 20 columns],
'total product detail views': account length location code user id credit card info save \
0 128 415 3824657 no
1 107 415 3717191 no
2 137 415 3581921 no
3 84 408 3759999 yes
4 75 415 3306626 yes
... ... ... ... ...
3328 192 415 4144276 no
3329 68 415 3703271 no
3330 28 510 3288230 no
3331 184 510 3646381 yes
3332 74 415 4004344 no
push status add to wishlist desktop sessions app sessions \
0 yes 25 265 45
1 yes 26 162 27
2 no 0 243 41
3 no 0 299 51
4 no 0 167 28
... ... ... ... ...
3328 yes 36 156 27
3329 no 0 231 39
3330 no 0 181 31
3331 no 0 214 36
3332 yes 25 234 40
desktop transactions total product detail views session duration \
0 17 110 197
1 17 123 196
2 10 114 121
3 5 71 62
4 13 113 148
... ... ... ...
3328 18 77 216
3329 13 57 153
3330 25 109 289
3331 14 105 160
3332 23 113 266
promotion clicks avg order value sale product views \
0 87 244.7 91
1 103 254.4 103
2 110 162.6 104
3 88 196.9 89
4 122 186.9 121
... ... ... ...
3328 126 279.1 83
3329 55 191.3 123
3330 58 191.9 91
3331 84 139.2 137
3332 82 241.4 77
discount rate per visited products product detail view per app session \
0 11.01 10.0
1 11.45 13.7
2 7.32 12.2
3 8.86 6.6
4 8.41 10.1
... ... ...
3328 12.56 9.9
3329 8.61 9.6
3330 8.64 14.1
3331 6.26 5.0
3332 10.86 13.7
app transactions add to cart per session customer service calls churn
0 3 2.70 1 0
1 3 3.70 1 0
2 5 3.29 0 0
3 7 1.78 2 0
4 3 2.73 3 0
... ... ... ... ...
3328 6 2.67 2 0
3329 4 2.59 3 0
3330 6 3.81 2 0
3331 10 1.35 2 0
3332 4 3.70 0 0
[3323 rows x 20 columns],
'session duration': account length location code user id credit card info save \
0 128 415 3824657 no
1 107 415 3717191 no
2 137 415 3581921 no
4 75 415 3306626 yes
5 118 510 3918027 yes
... ... ... ... ...
3328 192 415 4144276 no
3329 68 415 3703271 no
3330 28 510 3288230 no
3331 184 510 3646381 yes
3332 74 415 4004344 no
push status add to wishlist desktop sessions app sessions \
0 yes 25 265 45
1 yes 26 162 27
2 no 0 243 41
4 no 0 167 28
5 no 0 223 38
... ... ... ... ...
3328 yes 36 156 27
3329 no 0 231 39
3330 no 0 181 31
3331 no 0 214 36
3332 yes 25 234 40
desktop transactions total product detail views session duration \
0 17 110 197
1 17 123 196
2 10 114 121
4 13 113 148
5 19 98 221
... ... ... ...
3328 18 77 216
3329 13 57 153
3330 25 109 289
3331 14 105 160
3332 23 113 266
promotion clicks avg order value sale product views \
0 87 244.7 91
1 103 254.4 103
2 110 162.6 104
4 122 186.9 121
5 101 203.9 118
... ... ... ...
3328 126 279.1 83
3329 55 191.3 123
3330 58 191.9 91
3331 84 139.2 137
3332 82 241.4 77
discount rate per visited products product detail view per app session \
0 11.01 10.0
1 11.45 13.7
2 7.32 12.2
4 8.41 10.1
5 9.18 6.3
... ... ...
3328 12.56 9.9
3329 8.61 9.6
3330 8.64 14.1
3331 6.26 5.0
3332 10.86 13.7
app transactions add to cart per session customer service calls churn
0 3 2.70 1 0
1 3 3.70 1 0
2 5 3.29 0 0
4 3 2.73 3 0
5 6 1.70 0 0
... ... ... ... ...
3328 6 2.67 2 0
3329 4 2.59 3 0
3330 6 3.81 2 0
3331 10 1.35 2 0
3332 4 3.70 0 0
[3283 rows x 20 columns],
'promotion clicks': account length location code user id credit card info save \
0 128 415 3824657 no
1 107 415 3717191 no
2 137 415 3581921 no
3 84 408 3759999 yes
4 75 415 3306626 yes
... ... ... ... ...
3328 192 415 4144276 no
3329 68 415 3703271 no
3330 28 510 3288230 no
3331 184 510 3646381 yes
3332 74 415 4004344 no
push status add to wishlist desktop sessions app sessions \
0 yes 25 265 45
1 yes 26 162 27
2 no 0 243 41
3 no 0 299 51
4 no 0 167 28
... ... ... ... ...
3328 yes 36 156 27
3329 no 0 231 39
3330 no 0 181 31
3331 no 0 214 36
3332 yes 25 234 40
desktop transactions total product detail views session duration \
0 17 110 197
1 17 123 196
2 10 114 121
3 5 71 62
4 13 113 148
... ... ... ...
3328 18 77 216
3329 13 57 153
3330 25 109 289
3331 14 105 160
3332 23 113 266
promotion clicks avg order value sale product views \
0 87 244.7 91
1 103 254.4 103
2 110 162.6 104
3 88 196.9 89
4 122 186.9 121
... ... ... ...
3328 126 279.1 83
3329 55 191.3 123
3330 58 191.9 91
3331 84 139.2 137
3332 82 241.4 77
discount rate per visited products product detail view per app session \
0 11.01 10.0
1 11.45 13.7
2 7.32 12.2
3 8.86 6.6
4 8.41 10.1
... ... ...
3328 12.56 9.9
3329 8.61 9.6
3330 8.64 14.1
3331 6.26 5.0
3332 10.86 13.7
app transactions add to cart per session customer service calls churn
0 3 2.70 1 0
1 3 3.70 1 0
2 5 3.29 0 0
3 7 1.78 2 0
4 3 2.73 3 0
... ... ... ... ...
3328 6 2.67 2 0
3329 4 2.59 3 0
3330 6 3.81 2 0
3331 10 1.35 2 0
3332 4 3.70 0 0
[3316 rows x 20 columns],
'avg order value': account length location code user id credit card info save \
0 128 415 3824657 no
1 107 415 3717191 no
2 137 415 3581921 no
3 84 408 3759999 yes
4 75 415 3306626 yes
... ... ... ... ...
3327 79 415 3483830 no
3329 68 415 3703271 no
3330 28 510 3288230 no
3331 184 510 3646381 yes
3332 74 415 4004344 no
push status add to wishlist desktop sessions app sessions \
0 yes 25 265 45
1 yes 26 162 27
2 no 0 243 41
3 no 0 299 51
4 no 0 167 28
... ... ... ... ...
3327 no 0 135 23
3329 no 0 231 39
3330 no 0 181 31
3331 no 0 214 36
3332 yes 25 234 40
desktop transactions total product detail views session duration \
0 17 110 197
1 17 123 196
2 10 114 121
3 5 71 62
4 13 113 148
... ... ... ...
3327 16 98 190
3329 13 57 153
3330 25 109 289
3331 14 105 160
3332 23 113 266
promotion clicks avg order value sale product views \
0 87 244.7 91
1 103 254.4 103
2 110 162.6 104
3 88 196.9 89
4 122 186.9 121
... ... ... ...
3327 68 221.4 128
3329 55 191.3 123
3330 58 191.9 91
3331 84 139.2 137
3332 82 241.4 77
discount rate per visited products product detail view per app session \
0 11.01 10.0
1 11.45 13.7
2 7.32 12.2
3 8.86 6.6
4 8.41 10.1
... ... ...
3327 9.96 11.8
3329 8.61 9.6
3330 8.64 14.1
3331 6.26 5.0
3332 10.86 13.7
app transactions add to cart per session customer service calls churn
0 3 2.70 1 0
1 3 3.70 1 0
2 5 3.29 0 0
3 7 1.78 2 0
4 3 2.73 3 0
... ... ... ... ...
3327 5 3.19 2 0
3329 4 2.59 3 0
3330 6 3.81 2 0
3331 10 1.35 2 0
3332 4 3.70 0 0
[2648 rows x 20 columns],
'sale product views': account length location code user id credit card info save \
0 128 415 3824657 no
1 107 415 3717191 no
2 137 415 3581921 no
3 84 408 3759999 yes
4 75 415 3306626 yes
... ... ... ... ...
3328 192 415 4144276 no
3329 68 415 3703271 no
3330 28 510 3288230 no
3331 184 510 3646381 yes
3332 74 415 4004344 no
push status add to wishlist desktop sessions app sessions \
0 yes 25 265 45
1 yes 26 162 27
2 no 0 243 41
3 no 0 299 51
4 no 0 167 28
... ... ... ... ...
3328 yes 36 156 27
3329 no 0 231 39
3330 no 0 181 31
3331 no 0 214 36
3332 yes 25 234 40
desktop transactions total product detail views session duration \
0 17 110 197
1 17 123 196
2 10 114 121
3 5 71 62
4 13 113 148
... ... ... ...
3328 18 77 216
3329 13 57 153
3330 25 109 289
3331 14 105 160
3332 23 113 266
promotion clicks avg order value sale product views \
0 87 244.7 91
1 103 254.4 103
2 110 162.6 104
3 88 196.9 89
4 122 186.9 121
... ... ... ...
3328 126 279.1 83
3329 55 191.3 123
3330 58 191.9 91
3331 84 139.2 137
3332 82 241.4 77
discount rate per visited products product detail view per app session \
0 11.01 10.0
1 11.45 13.7
2 7.32 12.2
3 8.86 6.6
4 8.41 10.1
... ... ...
3328 12.56 9.9
3329 8.61 9.6
3330 8.64 14.1
3331 6.26 5.0
3332 10.86 13.7
app transactions add to cart per session customer service calls churn
0 3 2.70 1 0
1 3 3.70 1 0
2 5 3.29 0 0
3 7 1.78 2 0
4 3 2.73 3 0
... ... ... ... ...
3328 6 2.67 2 0
3329 4 2.59 3 0
3330 6 3.81 2 0
3331 10 1.35 2 0
3332 4 3.70 0 0
[3322 rows x 20 columns],
'discount rate per visited products': account length location code user id credit card info save \
0 128 415 3824657 no
1 107 415 3717191 no
2 137 415 3581921 no
3 84 408 3759999 yes
4 75 415 3306626 yes
... ... ... ... ...
3328 192 415 4144276 no
3329 68 415 3703271 no
3330 28 510 3288230 no
3331 184 510 3646381 yes
3332 74 415 4004344 no
push status add to wishlist desktop sessions app sessions \
0 yes 25 265 45
1 yes 26 162 27
2 no 0 243 41
3 no 0 299 51
4 no 0 167 28
... ... ... ... ...
3328 yes 36 156 27
3329 no 0 231 39
3330 no 0 181 31
3331 no 0 214 36
3332 yes 25 234 40
desktop transactions total product detail views session duration \
0 17 110 197
1 17 123 196
2 10 114 121
3 5 71 62
4 13 113 148
... ... ... ...
3328 18 77 216
3329 13 57 153
3330 25 109 289
3331 14 105 160
3332 23 113 266
promotion clicks avg order value sale product views \
0 87 244.7 91
1 103 254.4 103
2 110 162.6 104
3 88 196.9 89
4 122 186.9 121
... ... ... ...
3328 126 279.1 83
3329 55 191.3 123
3330 58 191.9 91
3331 84 139.2 137
3332 82 241.4 77
discount rate per visited products product detail view per app session \
0 11.01 10.0
1 11.45 13.7
2 7.32 12.2
3 8.86 6.6
4 8.41 10.1
... ... ...
3328 12.56 9.9
3329 8.61 9.6
3330 8.64 14.1
3331 6.26 5.0
3332 10.86 13.7
app transactions add to cart per session customer service calls churn
0 3 2.70 1 0
1 3 3.70 1 0
2 5 3.29 0 0
3 7 1.78 2 0
4 3 2.73 3 0
... ... ... ... ...
3328 6 2.67 2 0
3329 4 2.59 3 0
3330 6 3.81 2 0
3331 10 1.35 2 0
3332 4 3.70 0 0
[3097 rows x 20 columns],
'product detail view per app session': account length location code user id credit card info save \
0 128 415 3824657 no
1 107 415 3717191 no
2 137 415 3581921 no
3 84 408 3759999 yes
4 75 415 3306626 yes
... ... ... ... ...
3328 192 415 4144276 no
3329 68 415 3703271 no
3330 28 510 3288230 no
3331 184 510 3646381 yes
3332 74 415 4004344 no
push status add to wishlist desktop sessions app sessions \
0 yes 25 265 45
1 yes 26 162 27
2 no 0 243 41
3 no 0 299 51
4 no 0 167 28
... ... ... ... ...
3328 yes 36 156 27
3329 no 0 231 39
3330 no 0 181 31
3331 no 0 214 36
3332 yes 25 234 40
desktop transactions total product detail views session duration \
0 17 110 197
1 17 123 196
2 10 114 121
3 5 71 62
4 13 113 148
... ... ... ...
3328 18 77 216
3329 13 57 153
3330 25 109 289
3331 14 105 160
3332 23 113 266
promotion clicks avg order value sale product views \
0 87 244.7 91
1 103 254.4 103
2 110 162.6 104
3 88 196.9 89
4 122 186.9 121
... ... ... ...
3328 126 279.1 83
3329 55 191.3 123
3330 58 191.9 91
3331 84 139.2 137
3332 82 241.4 77
discount rate per visited products product detail view per app session \
0 11.01 10.0
1 11.45 13.7
2 7.32 12.2
3 8.86 6.6
4 8.41 10.1
... ... ...
3328 12.56 9.9
3329 8.61 9.6
3330 8.64 14.1
3331 6.26 5.0
3332 10.86 13.7
app transactions add to cart per session customer service calls churn
0 3 2.70 1 0
1 3 3.70 1 0
2 5 3.29 0 0
3 7 1.78 2 0
4 3 2.73 3 0
... ... ... ... ...
3328 6 2.67 2 0
3329 4 2.59 3 0
3330 6 3.81 2 0
3331 10 1.35 2 0
3332 4 3.70 0 0
[3317 rows x 20 columns],
'app transactions': account length location code user id credit card info save \
0 128 415 3824657 no
1 107 415 3717191 no
2 137 415 3581921 no
3 84 408 3759999 yes
4 75 415 3306626 yes
... ... ... ... ...
3328 192 415 4144276 no
3329 68 415 3703271 no
3330 28 510 3288230 no
3331 184 510 3646381 yes
3332 74 415 4004344 no
push status add to wishlist desktop sessions app sessions \
0 yes 25 265 45
1 yes 26 162 27
2 no 0 243 41
3 no 0 299 51
4 no 0 167 28
... ... ... ... ...
3328 yes 36 156 27
3329 no 0 231 39
3330 no 0 181 31
3331 no 0 214 36
3332 yes 25 234 40
desktop transactions total product detail views session duration \
0 17 110 197
1 17 123 196
2 10 114 121
3 5 71 62
4 13 113 148
... ... ... ...
3328 18 77 216
3329 13 57 153
3330 25 109 289
3331 14 105 160
3332 23 113 266
promotion clicks avg order value sale product views \
0 87 244.7 91
1 103 254.4 103
2 110 162.6 104
3 88 196.9 89
4 122 186.9 121
... ... ... ...
3328 126 279.1 83
3329 55 191.3 123
3330 58 191.9 91
3331 84 139.2 137
3332 82 241.4 77
discount rate per visited products product detail view per app session \
0 11.01 10.0
1 11.45 13.7
2 7.32 12.2
3 8.86 6.6
4 8.41 10.1
... ... ...
3328 12.56 9.9
3329 8.61 9.6
3330 8.64 14.1
3331 6.26 5.0
3332 10.86 13.7
app transactions add to cart per session customer service calls churn
0 3 2.70 1 0
1 3 3.70 1 0
2 5 3.29 0 0
3 7 1.78 2 0
4 3 2.73 3 0
... ... ... ... ...
3328 6 2.67 2 0
3329 4 2.59 3 0
3330 6 3.81 2 0
3331 10 1.35 2 0
3332 4 3.70 0 0
[3330 rows x 20 columns],
'add to cart per session': account length location code user id credit card info save \
0 128 415 3824657 no
1 107 415 3717191 no
2 137 415 3581921 no
3 84 408 3759999 yes
4 75 415 3306626 yes
... ... ... ... ...
3328 192 415 4144276 no
3329 68 415 3703271 no
3330 28 510 3288230 no
3331 184 510 3646381 yes
3332 74 415 4004344 no
push status add to wishlist desktop sessions app sessions \
0 yes 25 265 45
1 yes 26 162 27
2 no 0 243 41
3 no 0 299 51
4 no 0 167 28
... ... ... ... ...
3328 yes 36 156 27
3329 no 0 231 39
3330 no 0 181 31
3331 no 0 214 36
3332 yes 25 234 40
desktop transactions total product detail views session duration \
0 17 110 197
1 17 123 196
2 10 114 121
3 5 71 62
4 13 113 148
... ... ... ...
3328 18 77 216
3329 13 57 153
3330 25 109 289
3331 14 105 160
3332 23 113 266
promotion clicks avg order value sale product views \
0 87 244.7 91
1 103 254.4 103
2 110 162.6 104
3 88 196.9 89
4 122 186.9 121
... ... ... ...
3328 126 279.1 83
3329 55 191.3 123
3330 58 191.9 91
3331 84 139.2 137
3332 82 241.4 77
discount rate per visited products product detail view per app session \
0 11.01 10.0
1 11.45 13.7
2 7.32 12.2
3 8.86 6.6
4 8.41 10.1
... ... ...
3328 12.56 9.9
3329 8.61 9.6
3330 8.64 14.1
3331 6.26 5.0
3332 10.86 13.7
app transactions add to cart per session customer service calls churn
0 3 2.70 1 0
1 3 3.70 1 0
2 5 3.29 0 0
3 7 1.78 2 0
4 3 2.73 3 0
... ... ... ... ...
3328 6 2.67 2 0
3329 4 2.59 3 0
3330 6 3.81 2 0
3331 10 1.35 2 0
3332 4 3.70 0 0
[3317 rows x 20 columns],
'customer service calls': account length location code user id credit card info save \
0 128 415 3824657 no
1 107 415 3717191 no
2 137 415 3581921 no
3 84 408 3759999 yes
4 75 415 3306626 yes
... ... ... ... ...
3328 192 415 4144276 no
3329 68 415 3703271 no
3330 28 510 3288230 no
3331 184 510 3646381 yes
3332 74 415 4004344 no
push status add to wishlist desktop sessions app sessions \
0 yes 25 265 45
1 yes 26 162 27
2 no 0 243 41
3 no 0 299 51
4 no 0 167 28
... ... ... ... ...
3328 yes 36 156 27
3329 no 0 231 39
3330 no 0 181 31
3331 no 0 214 36
3332 yes 25 234 40
desktop transactions total product detail views session duration \
0 17 110 197
1 17 123 196
2 10 114 121
3 5 71 62
4 13 113 148
... ... ... ...
3328 18 77 216
3329 13 57 153
3330 25 109 289
3331 14 105 160
3332 23 113 266
promotion clicks avg order value sale product views \
0 87 244.7 91
1 103 254.4 103
2 110 162.6 104
3 88 196.9 89
4 122 186.9 121
... ... ... ...
3328 126 279.1 83
3329 55 191.3 123
3330 58 191.9 91
3331 84 139.2 137
3332 82 241.4 77
discount rate per visited products product detail view per app session \
0 11.01 10.0
1 11.45 13.7
2 7.32 12.2
3 8.86 6.6
4 8.41 10.1
... ... ...
3328 12.56 9.9
3329 8.61 9.6
3330 8.64 14.1
3331 6.26 5.0
3332 10.86 13.7
app transactions add to cart per session customer service calls churn
0 3 2.70 1 0
1 3 3.70 1 0
2 5 3.29 0 0
3 7 1.78 2 0
4 3 2.73 3 0
... ... ... ... ...
3328 6 2.67 2 0
3329 4 2.59 3 0
3330 6 3.81 2 0
3331 10 1.35 2 0
3332 4 3.70 0 0
[3333 rows x 20 columns],
'churn': account length location code user id credit card info save \
0 128 415 3824657 no
1 107 415 3717191 no
2 137 415 3581921 no
3 84 408 3759999 yes
4 75 415 3306626 yes
... ... ... ... ...
3328 192 415 4144276 no
3329 68 415 3703271 no
3330 28 510 3288230 no
3331 184 510 3646381 yes
3332 74 415 4004344 no
push status add to wishlist desktop sessions app sessions \
0 yes 25 265 45
1 yes 26 162 27
2 no 0 243 41
3 no 0 299 51
4 no 0 167 28
... ... ... ... ...
3328 yes 36 156 27
3329 no 0 231 39
3330 no 0 181 31
3331 no 0 214 36
3332 yes 25 234 40
desktop transactions total product detail views session duration \
0 17 110 197
1 17 123 196
2 10 114 121
3 5 71 62
4 13 113 148
... ... ... ...
3328 18 77 216
3329 13 57 153
3330 25 109 289
3331 14 105 160
3332 23 113 266
promotion clicks avg order value sale product views \
0 87 244.7 91
1 103 254.4 103
2 110 162.6 104
3 88 196.9 89
4 122 186.9 121
... ... ... ...
3328 126 279.1 83
3329 55 191.3 123
3330 58 191.9 91
3331 84 139.2 137
3332 82 241.4 77
discount rate per visited products product detail view per app session \
0 11.01 10.0
1 11.45 13.7
2 7.32 12.2
3 8.86 6.6
4 8.41 10.1
... ... ...
3328 12.56 9.9
3329 8.61 9.6
3330 8.64 14.1
3331 6.26 5.0
3332 10.86 13.7
app transactions add to cart per session customer service calls churn
0 3 2.70 1 0
1 3 3.70 1 0
2 5 3.29 0 0
3 7 1.78 2 0
4 3 2.73 3 0
... ... ... ... ...
3328 6 2.67 2 0
3329 4 2.59 3 0
3330 6 3.81 2 0
3331 10 1.35 2 0
3332 4 3.70 0 0
[3333 rows x 20 columns]}
# # Define the column for which you want to identify outliers
# column_name = 'desktop transactions'
# # Calculate quartiles
# Q1 = df_customer[column_name].quantile(0.25)
# Q3 = df_customer[column_name].quantile(0.75)
# # Calculate IQR
# IQR = Q3 - Q1
# # Define lower and upper bounds for potential outliers
# lower_bound = Q1 - 1.5 * IQR
# upper_bound = Q3 + 1.5 * IQR
# for column_name in df_customer.columns:
# # Identify potential outliers
# potential_outliers = df_customer[column_name][(df_customer[column_name] < lower_bound) | (df_customer[column_name] > upper_bound)]
# # Display potential outliers
# print(f'{column_name}: Number of potential outliers: {potential_outliers}')
#plt.figure(figsize=(8, 6))
#numeric_columns = df_customer.select_dtypes(include=['int64', 'float64'])
fig = px.box(df_customer, y=df_customer['app transactions'], title=f'Box Plot of App Transactions', color_discrete_sequence=['blue'])
fig.update_layout(width=700, height=450)
fig.show()
for column in df_customer.columns:
fig = px.box(df_customer, y=df_customer[column], title=f'Box Plot of {column}', color_discrete_sequence=['blue'])
fig.update_layout(width=700, height=450)
fig.show()
for column in df_customer.columns:
plt.figure(figsize=(8, 6)) # Set the figure size
plt.hist(df_customer[column], bins=10, color='skyblue', edgecolor='black') # Adjust the number of bins as needed
plt.title(f'Histogram of {column}')
plt.xlabel(column)
plt.ylabel('Frequency')
plt.grid(axis='y', alpha=0.75)
plt.show()
All of the numerical variables in the dataset has outliers from a statistical point of view. What is apparent is that all of the outliers represent natural variations as in e-commerce we expect such variations, making the outliers "True outliers". If we had Outliers that we believe them to be data errors we could always use techniques such as removing all data points that are < Q1 - 1.5(IQR) and > Q3 +1.5(IQR) or removing data points that are 3 standard deviations below or above mean
The deviation from the IQR can be ignored in case of e-commerce data since the customers can browse through the website and decide not to add anything to the cart, while some customers on the same hand will add multiple products to the cart, it is highly dependant on the type of customer we are dealing with and hence can have huge variations in the behaviour.
# def remove_outliers_std(df_customer, feature):
# mean = df_customer[feature].mean()
# std = df_customer[feature].std()
# lower_bound = mean - 3 * std
# upper_bound = mean + 3 * std
# df_customer = df_customer[(df_customer[feature] >= lower_bound) & (df_customer[feature] <= upper_bound)]
# return df_customer
# # Remove outliers for each feature using standard deviation
# for feature in df_customer.columns:
# df_customer = remove_outliers_std(df_customer, feature)
# # Print the updated dataset without outliers
# print(df_customer)
churn_count = (df_customer['churn'] == 1).sum()
no_churn_count = (df_customer['churn'] == 0).sum()
labels = 'Customers Left', 'Customers Stayed'
sizes = [churn_count, no_churn_count]
colors_given = ['#FF4433', '#66B3FF']
explode = (0.1, 0) # Pull apart the first slice (Customer Left) by 10%
fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, wedgeprops={'edgecolor': 'black'}, colors=colors_given, explode=explode)
ax.axis('equal')
legend_labels = ['Customers Left', 'Customers Stayed']
legend = ax.legend(legend_labels, title="", loc="center left", bbox_to_anchor=(1.1, 1))
legend.get_frame().set_edgecolor('black')
plt.title('Churn Analysis', fontweight='bold')
plt.show()
result = df_customer['app transactions'].value_counts().reset_index()
result.columns = ['app transactions', 'user_count']
# Display the result
print(result)
app transactions user_count 0 3 668 1 4 619 2 2 489 3 5 472 4 6 336 5 7 218 6 1 160 7 8 116 8 9 109 9 10 50 10 11 28 11 0 18 12 12 15 13 13 14 14 15 7 15 14 6 16 18 3 17 16 2 18 19 1 19 20 1 20 17 1
colors = ['blue', 'green', 'red', 'purple']
plt.bar(result['app transactions'], result['user_count'], color=colors)
plt.xlabel('App Transaction')
plt.ylabel('User Count')
plt.title('User Count for Each App Transaction')
plt.show()
cc_info_saved = (df_customer['credit card info save'] == 'yes').sum()
cc_info_not_saved = (df_customer['credit card info save'] =='no').sum()
labels = 'Customers Credit Card Info Saved', 'Customers Credit Card Info Not Saved'
sizes = [cc_info_saved, cc_info_not_saved]
colors_given = ['#FF4433', '#66B3FF']
explode = (0.1, 0) # Pull apart the first slice (Customer Left) by 10%
fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, wedgeprops={'edgecolor': 'black'}, colors=colors_given, explode=explode)
ax.axis('equal')
legend_labels = ['Customers Credit Card Info Saved', 'Customers Credit Card Info Not Saved']
legend = ax.legend(legend_labels, title="", loc="center left", bbox_to_anchor=(1.1, 1))
legend.get_frame().set_edgecolor('black')
plt.title('Credit card info Analysis', fontweight='bold')
plt.show()





id_col = ['user id']
target_col = ['churn']
cat_cols = df_customer.nunique()[df_customer.nunique()<4].keys().tolist()
cat_cols
['location code', 'credit card info save', 'push status', 'churn']
cat_cols = [x for x in cat_cols if x not in target_col]
num_cols = [x for x in df_customer.columns if x not in cat_cols + id_col + target_col]
num_cols
['account length', 'add to wishlist', 'desktop sessions', 'app sessions', 'desktop transactions', 'total product detail views', 'session duration', 'promotion clicks', 'avg order value', 'sale product views', 'discount rate per visited products', 'product detail view per app session', 'app transactions', 'add to cart per session', 'customer service calls']
scaler = MinMaxScaler()
scaler.fit(df_customer[num_cols])
df_customer[num_cols] = scaler.transform(df_customer[num_cols])
df_customer.head(20).T
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| account length | 0.524793 | 0.438017 | 0.561983 | 0.342975 | 0.305785 | 0.483471 | 0.495868 | 0.603306 | 0.479339 | 0.578512 | 0.264463 | 0.301653 | 0.690083 | 0.38843 | 0.252066 | 0.661157 | 0.347107 | 0.380165 | 0.309917 | 0.297521 |
| location code | 415 | 415 | 415 | 408 | 415 | 510 | 510 | 415 | 408 | 415 | 415 | 415 | 408 | 510 | 415 | 415 | 408 | 510 | 510 | 415 |
| user id | 3824657 | 3717191 | 3581921 | 3759999 | 3306626 | 3918027 | 3559993 | 3299001 | 3354719 | 3308173 | 3296603 | 3449403 | 3631107 | 3948006 | 3669238 | 3517269 | 3508884 | 3862923 | 3562992 | 3732782 |
| credit card info save | no | no | no | yes | yes | yes | no | yes | no | yes | no | no | no | no | no | no | no | no | no | no |
| push status | yes | yes | no | no | no | no | yes | no | no | yes | no | no | no | no | no | no | yes | no | yes | no |
| add to wishlist | 0.490196 | 0.509804 | 0.0 | 0.0 | 0.0 | 0.0 | 0.470588 | 0.0 | 0.0 | 0.72549 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.529412 | 0.0 | 0.647059 | 0.0 |
| desktop sessions | 0.754986 | 0.461538 | 0.692308 | 0.851852 | 0.475783 | 0.635328 | 0.621083 | 0.447293 | 0.527066 | 0.737892 | 0.367521 | 0.535613 | 0.367521 | 0.447293 | 0.344729 | 0.948718 | 0.558405 | 0.54416 | 0.541311 | 0.638177 |
| app sessions | 0.75 | 0.45 | 0.683333 | 0.85 | 0.466667 | 0.633333 | 0.616667 | 0.45 | 0.516667 | 0.733333 | 0.366667 | 0.533333 | 0.366667 | 0.45 | 0.35 | 0.95 | 0.55 | 0.533333 | 0.533333 | 0.633333 |
| desktop transactions | 0.548387 | 0.548387 | 0.322581 | 0.16129 | 0.419355 | 0.612903 | 0.967742 | 0.290323 | 0.967742 | 0.612903 | 0.612903 | 0.451613 | 0.290323 | 0.677419 | 0.83871 | 0.870968 | 0.774194 | 0.612903 | 0.580645 | 0.451613 |
| total product detail views | 0.666667 | 0.745455 | 0.690909 | 0.430303 | 0.684848 | 0.593939 | 0.533333 | 0.478788 | 0.587879 | 0.509091 | 0.830303 | 0.769697 | 0.581818 | 0.533333 | 0.424242 | 0.406061 | 0.842424 | 0.690909 | 0.4 | 0.545455 |
| session duration | 0.541209 | 0.538462 | 0.332418 | 0.17033 | 0.406593 | 0.607143 | 0.958791 | 0.282967 | 0.967033 | 0.60989 | 0.629121 | 0.447802 | 0.288462 | 0.681319 | 0.843407 | 0.873626 | 0.771978 | 0.598901 | 0.585165 | 0.43956 |
| promotion clicks | 0.511765 | 0.605882 | 0.647059 | 0.517647 | 0.717647 | 0.594118 | 0.635294 | 0.552941 | 0.470588 | 0.652941 | 0.488235 | 0.870588 | 0.417647 | 0.441176 | 0.447059 | 0.570588 | 0.529412 | 0.652941 | 0.382353 | 0.517647 |
| avg order value | 0.59575 | 0.62184 | 0.374933 | 0.467187 | 0.44029 | 0.486014 | 0.509414 | 0.507262 | 0.51802 | 0.815492 | 0.499193 | 0.464766 | 0.317106 | 0.454814 | 0.483593 | 0.369554 | 0.177784 | 0.286175 | 0.383271 | 0.456159 |
| sale product views | 0.408451 | 0.492958 | 0.5 | 0.394366 | 0.619718 | 0.598592 | 0.598592 | 0.443662 | 0.401408 | 0.450704 | 0.549296 | 0.429577 | 0.669014 | 0.577465 | 0.464789 | 0.669014 | 0.295775 | 0.619718 | 0.528169 | 0.288732 |
| discount rate per visited products | 0.595935 | 0.622236 | 0.375374 | 0.467424 | 0.440526 | 0.486551 | 0.509863 | 0.507472 | 0.518231 | 0.8159 | 0.499701 | 0.465033 | 0.317394 | 0.454871 | 0.48416 | 0.369994 | 0.178123 | 0.286312 | 0.383742 | 0.456665 |
| product detail view per app session | 0.5 | 0.685 | 0.61 | 0.33 | 0.505 | 0.315 | 0.375 | 0.355 | 0.435 | 0.56 | 0.635 | 0.455 | 0.56 | 0.615 | 0.655 | 0.27 | 0.69 | 0.405 | 0.5 | 0.65 |
| app transactions | 0.15 | 0.15 | 0.25 | 0.35 | 0.15 | 0.3 | 0.35 | 0.3 | 0.2 | 0.25 | 0.3 | 0.25 | 0.1 | 0.25 | 0.3 | 0.45 | 0.2 | 0.15 | 0.25 | 0.1 |
| add to cart per session | 0.5 | 0.685185 | 0.609259 | 0.32963 | 0.505556 | 0.314815 | 0.375926 | 0.355556 | 0.435185 | 0.559259 | 0.635185 | 0.455556 | 0.559259 | 0.614815 | 0.655556 | 0.27037 | 0.690741 | 0.405556 | 0.5 | 0.65 |
| customer service calls | 0.111111 | 0.111111 | 0.0 | 0.222222 | 0.333333 | 0.0 | 0.333333 | 0.0 | 0.111111 | 0.0 | 0.444444 | 0.0 | 0.111111 | 0.333333 | 0.444444 | 0.444444 | 0.111111 | 0.333333 | 0.111111 | 0.111111 |
| churn | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
label_encoder = LabelEncoder()
df_customer[cat_cols]=df_customer[cat_cols].apply(LabelEncoder().fit_transform)
df_customer.head(20).T
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| account length | 5.247934e-01 | 4.380165e-01 | 5.619835e-01 | 3.429752e-01 | 3.057851e-01 | 4.834711e-01 | 4.958678e-01 | 6.033058e-01 | 4.793388e-01 | 5.785124e-01 | 2.644628e-01 | 3.016529e-01 | 6.900826e-01 | 3.884298e-01 | 2.520661e-01 | 6.611570e-01 | 3.471074e-01 | 3.801653e-01 | 3.099174e-01 | 2.975207e-01 |
| location code | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 0.000000e+00 | 1.000000e+00 | 2.000000e+00 | 2.000000e+00 | 1.000000e+00 | 0.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 0.000000e+00 | 2.000000e+00 | 1.000000e+00 | 1.000000e+00 | 0.000000e+00 | 2.000000e+00 | 2.000000e+00 | 1.000000e+00 |
| user id | 3.824657e+06 | 3.717191e+06 | 3.581921e+06 | 3.759999e+06 | 3.306626e+06 | 3.918027e+06 | 3.559993e+06 | 3.299001e+06 | 3.354719e+06 | 3.308173e+06 | 3.296603e+06 | 3.449403e+06 | 3.631107e+06 | 3.948006e+06 | 3.669238e+06 | 3.517269e+06 | 3.508884e+06 | 3.862923e+06 | 3.562992e+06 | 3.732782e+06 |
| credit card info save | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 0.000000e+00 | 1.000000e+00 | 0.000000e+00 | 1.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 |
| push status | 1.000000e+00 | 1.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 1.000000e+00 | 0.000000e+00 | 0.000000e+00 | 1.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 1.000000e+00 | 0.000000e+00 | 1.000000e+00 | 0.000000e+00 |
| add to wishlist | 4.901961e-01 | 5.098039e-01 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 4.705882e-01 | 0.000000e+00 | 0.000000e+00 | 7.254902e-01 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 5.294118e-01 | 0.000000e+00 | 6.470588e-01 | 0.000000e+00 |
| desktop sessions | 7.549858e-01 | 4.615385e-01 | 6.923077e-01 | 8.518519e-01 | 4.757835e-01 | 6.353276e-01 | 6.210826e-01 | 4.472934e-01 | 5.270655e-01 | 7.378917e-01 | 3.675214e-01 | 5.356125e-01 | 3.675214e-01 | 4.472934e-01 | 3.447293e-01 | 9.487179e-01 | 5.584046e-01 | 5.441595e-01 | 5.413105e-01 | 6.381766e-01 |
| app sessions | 7.500000e-01 | 4.500000e-01 | 6.833333e-01 | 8.500000e-01 | 4.666667e-01 | 6.333333e-01 | 6.166667e-01 | 4.500000e-01 | 5.166667e-01 | 7.333333e-01 | 3.666667e-01 | 5.333333e-01 | 3.666667e-01 | 4.500000e-01 | 3.500000e-01 | 9.500000e-01 | 5.500000e-01 | 5.333333e-01 | 5.333333e-01 | 6.333333e-01 |
| desktop transactions | 5.483871e-01 | 5.483871e-01 | 3.225806e-01 | 1.612903e-01 | 4.193548e-01 | 6.129032e-01 | 9.677419e-01 | 2.903226e-01 | 9.677419e-01 | 6.129032e-01 | 6.129032e-01 | 4.516129e-01 | 2.903226e-01 | 6.774194e-01 | 8.387097e-01 | 8.709677e-01 | 7.741935e-01 | 6.129032e-01 | 5.806452e-01 | 4.516129e-01 |
| total product detail views | 6.666667e-01 | 7.454545e-01 | 6.909091e-01 | 4.303030e-01 | 6.848485e-01 | 5.939394e-01 | 5.333333e-01 | 4.787879e-01 | 5.878788e-01 | 5.090909e-01 | 8.303030e-01 | 7.696970e-01 | 5.818182e-01 | 5.333333e-01 | 4.242424e-01 | 4.060606e-01 | 8.424242e-01 | 6.909091e-01 | 4.000000e-01 | 5.454545e-01 |
| session duration | 5.412088e-01 | 5.384615e-01 | 3.324176e-01 | 1.703297e-01 | 4.065934e-01 | 6.071429e-01 | 9.587912e-01 | 2.829670e-01 | 9.670330e-01 | 6.098901e-01 | 6.291209e-01 | 4.478022e-01 | 2.884615e-01 | 6.813187e-01 | 8.434066e-01 | 8.736264e-01 | 7.719780e-01 | 5.989011e-01 | 5.851648e-01 | 4.395604e-01 |
| promotion clicks | 5.117647e-01 | 6.058824e-01 | 6.470588e-01 | 5.176471e-01 | 7.176471e-01 | 5.941176e-01 | 6.352941e-01 | 5.529412e-01 | 4.705882e-01 | 6.529412e-01 | 4.882353e-01 | 8.705882e-01 | 4.176471e-01 | 4.411765e-01 | 4.470588e-01 | 5.705882e-01 | 5.294118e-01 | 6.529412e-01 | 3.823529e-01 | 5.176471e-01 |
| avg order value | 5.957504e-01 | 6.218397e-01 | 3.749328e-01 | 4.671867e-01 | 4.402905e-01 | 4.860140e-01 | 5.094137e-01 | 5.072620e-01 | 5.180204e-01 | 8.154922e-01 | 4.991931e-01 | 4.647660e-01 | 3.171060e-01 | 4.548144e-01 | 4.835933e-01 | 3.695535e-01 | 1.777838e-01 | 2.861754e-01 | 3.832706e-01 | 4.561592e-01 |
| sale product views | 4.084507e-01 | 4.929577e-01 | 5.000000e-01 | 3.943662e-01 | 6.197183e-01 | 5.985915e-01 | 5.985915e-01 | 4.436620e-01 | 4.014085e-01 | 4.507042e-01 | 5.492958e-01 | 4.295775e-01 | 6.690141e-01 | 5.774648e-01 | 4.647887e-01 | 6.690141e-01 | 2.957746e-01 | 6.197183e-01 | 5.281690e-01 | 2.887324e-01 |
| discount rate per visited products | 5.959354e-01 | 6.222355e-01 | 3.753736e-01 | 4.674238e-01 | 4.405260e-01 | 4.865511e-01 | 5.098625e-01 | 5.074716e-01 | 5.182307e-01 | 8.158996e-01 | 4.997011e-01 | 4.650329e-01 | 3.173939e-01 | 4.548715e-01 | 4.841602e-01 | 3.699940e-01 | 1.781231e-01 | 2.863120e-01 | 3.837418e-01 | 4.566647e-01 |
| product detail view per app session | 5.000000e-01 | 6.850000e-01 | 6.100000e-01 | 3.300000e-01 | 5.050000e-01 | 3.150000e-01 | 3.750000e-01 | 3.550000e-01 | 4.350000e-01 | 5.600000e-01 | 6.350000e-01 | 4.550000e-01 | 5.600000e-01 | 6.150000e-01 | 6.550000e-01 | 2.700000e-01 | 6.900000e-01 | 4.050000e-01 | 5.000000e-01 | 6.500000e-01 |
| app transactions | 1.500000e-01 | 1.500000e-01 | 2.500000e-01 | 3.500000e-01 | 1.500000e-01 | 3.000000e-01 | 3.500000e-01 | 3.000000e-01 | 2.000000e-01 | 2.500000e-01 | 3.000000e-01 | 2.500000e-01 | 1.000000e-01 | 2.500000e-01 | 3.000000e-01 | 4.500000e-01 | 2.000000e-01 | 1.500000e-01 | 2.500000e-01 | 1.000000e-01 |
| add to cart per session | 5.000000e-01 | 6.851852e-01 | 6.092593e-01 | 3.296296e-01 | 5.055556e-01 | 3.148148e-01 | 3.759259e-01 | 3.555556e-01 | 4.351852e-01 | 5.592593e-01 | 6.351852e-01 | 4.555556e-01 | 5.592593e-01 | 6.148148e-01 | 6.555556e-01 | 2.703704e-01 | 6.907407e-01 | 4.055556e-01 | 5.000000e-01 | 6.500000e-01 |
| customer service calls | 1.111111e-01 | 1.111111e-01 | 0.000000e+00 | 2.222222e-01 | 3.333333e-01 | 0.000000e+00 | 3.333333e-01 | 0.000000e+00 | 1.111111e-01 | 0.000000e+00 | 4.444444e-01 | 0.000000e+00 | 1.111111e-01 | 3.333333e-01 | 4.444444e-01 | 4.444444e-01 | 1.111111e-01 | 3.333333e-01 | 1.111111e-01 | 1.111111e-01 |
| churn | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 1.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 1.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 |
X = df_customer.drop(target_col,axis=1)
y=df_customer[target_col]
print(X.shape,y.shape)
(3333, 19) (3333, 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
lr = LogisticRegression()
lr.fit(X_train,y_train)
validation_predictions = lr.predict(X_test)
validation_e = accuracy_score(y_test,validation_predictions)
print(f"Accuracy {validation_e*100:.2f}% correct")
lr.predict_proba(X_test)
df_predict = pd.DataFrame(lr.predict_proba(X_test),columns=['Churn','No Churn',])
df_predict['prediction'] = lr.predict(X_test)
print(df_predict)
ax = plt.axes()
df_cm = confusion_matrix(y_test,validation_predictions)
tp, fp, fn, tn = df_cm.ravel()
precision = tp / (tp + fp)
recall = tp / (tp + fn)
# Calculate the F1-score
f1 = 2 * (precision * recall) / (precision + recall)
print(f"F1-Score: {f1 * 100:.2f}%")
print(f"Precision-Score: {precision * 100:.2f}%")
print(f"Recall-Score: {recall * 100:.2f}%")
sns.heatmap(df_cm, annot=True, annot_kws={"size": 20}, fmt='d',cmap="Blues", ax = ax )
ax.set_title('Confusion Matrix')
plt.show()
Accuracy 84.86% correct
Churn No Churn prediction
0 0.852772 0.147228 0
1 0.862889 0.137111 0
2 0.850183 0.149817 0
3 0.840457 0.159543 0
4 0.859420 0.140580 0
.. ... ... ...
662 0.875379 0.124621 0
663 0.835533 0.164467 0
664 0.860065 0.139935 0
665 0.869321 0.130679 0
666 0.858233 0.141767 0
[667 rows x 3 columns]
F1-Score: 91.81%
Precision-Score: 100.00%
Recall-Score: 84.86%
C:\Users\visha\anaconda3\lib\site-packages\sklearn\utils\validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
dt = DecisionTreeClassifier()
dt_model = dt.fit(X_train,y_train)
y_pred_dt = dt.predict(X_test)
accuracy = accuracy_score(y_test, y_pred_dt)
precision = precision_score(y_test, y_pred_dt, average='weighted')
f1 = f1_score(y_test, y_pred_dt)
recall_dt = recall_score(y_test,y_pred_dt)
print(f"F1-Score: {f1*100:.2f}% correct")
print(f"Accuracy: {accuracy*100:.2f}% correct")
print(f"Precision: {precision*100:.2f}% correct")
print(f"Recall: {recall_dt*100:.2f}% correct")
ax = plt.axes()
df_cm = confusion_matrix(y_test,y_pred_dt)
sns.heatmap(df_cm, annot=True, annot_kws={"size": 20}, fmt='d',cmap="Blues", ax = ax )
ax.set_title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()
F1-Score: 62.83% correct Accuracy: 89.36% correct Precision: 88.92% correct Recall: 59.41% correct
rf = RandomForestClassifier(n_estimators=100)
rf_model = rf.fit(X_train,y_train)
y_pred_rf = rf.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf, average='weighted')
f1_rf = f1_score(y_test, y_pred_rf)
recall_rf = recall_score(y_test,y_pred_rf)
print(f"F1-Score: {f1_rf*100:.2f}% correct")
print(f"Accuracy: {accuracy_rf*100:.2f}% correct")
print(f"Precision: {precision_rf*100:.2f}% correct")
print(f"Recall: {recall_rf*100:.2f}% correct")
ax = plt.axes()
rf_cm = confusion_matrix(y_test,y_pred_rf)
sns.heatmap(rf_cm, annot=True, annot_kws={"size": 20}, fmt='d',cmap="Blues", ax = ax )
ax.set_title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()
C:\Users\visha\AppData\Local\Temp\ipykernel_24772\2612684483.py:3: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
F1-Score: 79.77% correct Accuracy: 94.75% correct Precision: 94.81% correct Recall: 68.32% correct
df_customer.T
df_customer[cat_cols]
df[cat_cols]
from matplotlib import pyplot as plt
from itertools import cycle, islice
import pandas, numpy as np
my_colors = list(islice(cycle(['b', 'r', 'g', 'y', 'k']), None, len(df['location code'])))
# Group the data by location code and calculate the churn rate
churn_by_location = df.groupby('location code')['churn'].sum()
# Create a bar chart
churn_by_location.plot(kind='bar',stacked=True, color=my_colors)
plt.title('Churn by location code')
plt.xlabel('location code')
plt.ylabel('Churn Rate')
plt.show()
my_colors = list(islice(cycle(['b', 'r', 'g', 'y', 'k']), None, len(df['credit card info save'])))
# Group the data by Credit Card Info Save and calculate the churn rate
churn_by_location = df.groupby('credit card info save')['churn'].sum()
# Create a bar chart
churn_by_location.plot(kind='bar',stacked=True, color=my_colors)
plt.title('Churn by Credit Card Info Save')
plt.xlabel('credit card info save')
plt.ylabel('Churn Rate')
plt.show()
### push status VS churn
my_colors = list(islice(cycle(['b', 'r', 'g', 'y', 'k']), None, len(df['push status'])))
# Group the data by push status and calculate the churn rate
churn_by_location = df.groupby('push status')['churn'].sum()
# Create a bar chart
churn_by_location.plot(kind='bar',stacked=True, color=my_colors)
plt.title('Churn by push status')
plt.xlabel('push status')
plt.ylabel('Churn Rate')
plt.show()
1. The location codes have a similar percentage of churns, meaning that location might not be a strong indication for churn
2. Customers with push status activated are less likely to be churns, meaning that motivating customers to activate push status might help minimize churns
3. Customers with their credit card information saved on the side are far less likely to be churns than customers without their credit card info saved. This means that if the shop motivates people to save their credit card info might minimize churns.
4. This makes sense since customers who trust their credit card info on a side indicate that they trust the brand. More trust less chances of customers leaving your business.